Face Recognition - In this project, we use a pre-trained model trained on Face recognition to recognise similar faces. Here, we are particularly interested in recognising whether two given faces are of the same person or not.
# imports
# !pip install opencv-python
# !pip install scikit-image
import os
import re
import random
import warnings
from time import time
from math import floor
from pathlib import Path
import pandas as pd, numpy as np
from pprint import pprint
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import trange, tqdm
from collections import defaultdict
import tensorflow as tf
import cv2
import skimage
from plantcv import plantcv as pcv
from skimage.io import imread, imshow
from skimage.color import rgb2gray, rgb2hsv, gray2rgb
from skimage.filters import sobel, threshold_otsu, gaussian, unsharp_mask
from skimage.feature import canny
from skimage.measure import find_contours
from skimage.morphology import binary_dilation, dilation
warnings.filterwarnings('ignore')
%matplotlib inline
# reproducibility
seed = 1996
random.seed(seed)
data_dir = Path('./data/Part 3 - Aligned Face Dataset from Pinterest/PINS/')
PEOPLE = os.listdir(data_dir)
NUM_PEOPLE = len(PEOPLE)
def clean_name(text):
text = re.sub('pins_', '', text)
text = re.sub('face', '', text)
text = re.sub('_', '', text)
text = text.title()
return str(text)
# some degree of class imbalance but not too much
for p in PEOPLE:
print(clean_name(p),
f"{' '*(25 - len(clean_name(p)))}: {len(os.listdir(data_dir / p))}")
print(f'\nTotal: {NUM_PEOPLE}') # Face Aligned Images from 100 people gathered from Pinterest
Aaron Paul : 86 Alexandra Daddario : 165 Alvaro Morte : 91 Alycia Debnam Carey : 144 Amanda Crew : 118 Amaury Nolasco : 113 Amber Heard : 151 Anna Gunn : 66 Anne Hathaway : 151 Barbara Palvin : 142 Bellamy Blake : 89 Benedict Cumberbatch : 81 Betsy Brandt : 68 Bill Gates : 86 Brenton Thwaites : 130 Brie Larson : 128 Brit Marling : 122 Bryan Cranston : 80 Caity Lotz : 123 Cameron Monaghan : 112 Chadwick Boseman : 119 Chance Perdomo : 86 Chris Evans : 88 Chris Pratt : 141 Cobie Smulders : 130 Danielle Panabaker : 115 Dave Franco : 126 David Mazouz : 104 Dominic Purcell : 107 Drake : 38 Dua Lipa : 137 Dwayne Johnson : 124 Eliza Taylor : 105 Elizabeth Olsen : 181 Elon Musk : 85 Emilia Clarke : 154 Emily Bett Rickards : 76 Emma Stone : 128 Emma Watson : 163 Gal Gadot : 158 Grant Gustin : 122 Gwyneth Paltrow : 121 Henry Cavil : 134 Jason Isaacs : 125 Jason Momoa : 133 Jeff Bezos : 88 Jeremy Renner : 119 Jesse Eisenberg : 93 Jim Parsons : 109 Jon Bernthal : 61 Josh Radnor : 101 Kiernan Shipka : 167 Kit Harington : 105 Kristen Stewart : 118 Krysten Ritter : 115 Kumail Nanjiani : 90 Lindsey Morgan : 76 Maisie Williams : 148 Margot Robbie : 140 Maria Pedraza : 68 Mark Ruffalo : 118 Mark Zuckerberg : 62 Martin Starr : 48 Melissa Benoit : 122 Miguel Herran : 81 Mike Colter : 71 Millie Bobby Brown : 82 Morena Baccarin : 132 Morgan Freeman : 97 Natalie Portman : 117 Neil Patrick Harris : 73 Paul Rudd : 127 Pedro Alonso : 77 Peter Dinklage : 94 Rami Melek : 75 Rihanna : 120 Rj Mitte : 71 Robert Downey Jr : 107 Robert Knepper : 95 Robin Taylor : 99 Ryan Reynolds : 97 Sarah Wayne Callies : 120 Scarlett Johansson : 146 Sean Pertwee : 82 Sebastian Stan : 107 Selena Gomez : 93 Shakira : 50 Sophie Turner : 121 Stephen Amell : 100 Sundar Pichai : 89 Tati Gabrielle : 65 Taylor Swift : 99 Thomas Middleditch : 82 Tom Cavanagh : 100 Tom Holland : 119 Ursula Corbero : 80 Wentworth Miller : 113 Willa Holland : 147 William Fichtner : 139 Zendaya : 109 Total: 100
def load_dataset(data_dir=data_dir):
'''loads the images and returns the dictionary of arrays'''
data = defaultdict(list)
dataset = defaultdict(list)
for p in PEOPLE:
images = os.listdir(data_dir / p)
for image_name in tqdm(images,
desc=f"{clean_name(p)}{' '*(25 - len(clean_name(p)))}",
ncols=100):
img_path = data_dir / p / image_name
img = imread(img_path)
data['labels'].append(clean_name(p)) # METADATA: Name (Cleaned)
data['images'].append(img) # Associated Image
dataset[clean_name(p)].append(img)
return dict(data), dict(dataset)
data, images = load_dataset()
Aaron Paul : 100%|███████████████████████████████████| 86/86 [00:00<00:00, 597.22it/s] Alexandra Daddario : 100%|█████████████████████████████████| 165/165 [00:00<00:00, 627.38it/s] Alvaro Morte : 100%|███████████████████████████████████| 91/91 [00:00<00:00, 674.08it/s] Alycia Debnam Carey : 100%|█████████████████████████████████| 144/144 [00:00<00:00, 566.93it/s] Amanda Crew : 100%|█████████████████████████████████| 118/118 [00:00<00:00, 674.27it/s] Amaury Nolasco : 100%|█████████████████████████████████| 113/113 [00:00<00:00, 604.27it/s] Amber Heard : 100%|█████████████████████████████████| 151/151 [00:00<00:00, 683.26it/s] Anna Gunn : 100%|███████████████████████████████████| 66/66 [00:00<00:00, 647.08it/s] Anne Hathaway : 100%|█████████████████████████████████| 151/151 [00:00<00:00, 634.46it/s] Barbara Palvin : 100%|█████████████████████████████████| 142/142 [00:00<00:00, 631.12it/s] Bellamy Blake : 100%|███████████████████████████████████| 89/89 [00:00<00:00, 566.89it/s] Benedict Cumberbatch : 100%|███████████████████████████████████| 81/81 [00:00<00:00, 704.50it/s] Betsy Brandt : 100%|███████████████████████████████████| 68/68 [00:00<00:00, 747.27it/s] Bill Gates : 100%|███████████████████████████████████| 86/86 [00:00<00:00, 705.02it/s] Brenton Thwaites : 100%|█████████████████████████████████| 130/130 [00:00<00:00, 718.24it/s] Brie Larson : 100%|█████████████████████████████████| 128/128 [00:00<00:00, 684.53it/s] Brit Marling : 100%|█████████████████████████████████| 122/122 [00:00<00:00, 713.46it/s] Bryan Cranston : 100%|███████████████████████████████████| 80/80 [00:00<00:00, 769.23it/s] Caity Lotz : 100%|█████████████████████████████████| 123/123 [00:00<00:00, 710.98it/s] Cameron Monaghan : 100%|█████████████████████████████████| 112/112 [00:00<00:00, 727.35it/s] Chadwick Boseman : 100%|█████████████████████████████████| 119/119 [00:00<00:00, 716.87it/s] Chance Perdomo : 100%|███████████████████████████████████| 86/86 [00:00<00:00, 735.03it/s] Chris Evans : 100%|███████████████████████████████████| 88/88 [00:00<00:00, 633.09it/s] Chris Pratt : 100%|█████████████████████████████████| 141/141 [00:00<00:00, 712.13it/s] Cobie Smulders : 100%|█████████████████████████████████| 130/130 [00:00<00:00, 718.24it/s] Danielle Panabaker : 100%|█████████████████████████████████| 115/115 [00:00<00:00, 746.76it/s] Dave Franco : 100%|█████████████████████████████████| 126/126 [00:00<00:00, 741.33it/s] David Mazouz : 100%|█████████████████████████████████| 104/104 [00:00<00:00, 675.32it/s] Dominic Purcell : 100%|█████████████████████████████████| 107/107 [00:00<00:00, 644.61it/s] Drake : 100%|███████████████████████████████████| 38/38 [00:00<00:00, 666.66it/s] Dua Lipa : 100%|█████████████████████████████████| 137/137 [00:00<00:00, 698.96it/s] Dwayne Johnson : 100%|█████████████████████████████████| 124/124 [00:00<00:00, 774.99it/s] Eliza Taylor : 100%|█████████████████████████████████| 105/105 [00:00<00:00, 719.17it/s] Elizabeth Olsen : 100%|█████████████████████████████████| 181/181 [00:00<00:00, 718.26it/s] Elon Musk : 100%|███████████████████████████████████| 85/85 [00:00<00:00, 680.02it/s] Emilia Clarke : 100%|█████████████████████████████████| 154/154 [00:00<00:00, 747.58it/s] Emily Bett Rickards : 100%|███████████████████████████████████| 76/76 [00:00<00:00, 716.99it/s] Emma Stone : 100%|█████████████████████████████████| 128/128 [00:00<00:00, 744.19it/s] Emma Watson : 100%|█████████████████████████████████| 163/163 [00:00<00:00, 702.55it/s] Gal Gadot : 100%|█████████████████████████████████| 158/158 [00:00<00:00, 774.51it/s] Grant Gustin : 100%|█████████████████████████████████| 122/122 [00:00<00:00, 721.88it/s] Gwyneth Paltrow : 100%|█████████████████████████████████| 121/121 [00:00<00:00, 729.25it/s] Henry Cavil : 100%|█████████████████████████████████| 134/134 [00:00<00:00, 708.99it/s] Jason Isaacs : 100%|█████████████████████████████████| 125/125 [00:00<00:00, 710.28it/s] Jason Momoa : 100%|█████████████████████████████████| 133/133 [00:00<00:00, 711.22it/s] Jeff Bezos : 100%|███████████████████████████████████| 88/88 [00:00<00:00, 721.32it/s] Jeremy Renner : 100%|█████████████████████████████████| 119/119 [00:00<00:00, 716.79it/s] Jesse Eisenberg : 100%|███████████████████████████████████| 93/93 [00:00<00:00, 738.12it/s] Jim Parsons : 100%|█████████████████████████████████| 109/109 [00:00<00:00, 736.55it/s] Jon Bernthal : 100%|███████████████████████████████████| 61/61 [00:00<00:00, 726.21it/s] Josh Radnor : 100%|█████████████████████████████████| 101/101 [00:00<00:00, 737.27it/s] Kiernan Shipka : 100%|█████████████████████████████████| 167/167 [00:00<00:00, 748.89it/s] Kit Harington : 100%|█████████████████████████████████| 105/105 [00:00<00:00, 719.28it/s] Kristen Stewart : 100%|█████████████████████████████████| 118/118 [00:00<00:00, 706.59it/s] Krysten Ritter : 100%|█████████████████████████████████| 115/115 [00:00<00:00, 778.34it/s] Kumail Nanjiani : 100%|███████████████████████████████████| 90/90 [00:00<00:00, 708.66it/s] Lindsey Morgan : 100%|███████████████████████████████████| 76/76 [00:00<00:00, 745.17it/s] Maisie Williams : 100%|█████████████████████████████████| 148/148 [00:00<00:00, 774.86it/s] Margot Robbie : 100%|█████████████████████████████████| 140/140 [00:00<00:00, 717.95it/s] Maria Pedraza : 100%|███████████████████████████████████| 68/68 [00:00<00:00, 708.35it/s] Mark Ruffalo : 100%|█████████████████████████████████| 118/118 [00:00<00:00, 771.23it/s] Mark Zuckerberg : 100%|███████████████████████████████████| 62/62 [00:00<00:00, 746.99it/s] Martin Starr : 100%|███████████████████████████████████| 48/48 [00:00<00:00, 705.88it/s] Melissa Benoit : 100%|█████████████████████████████████| 122/122 [00:00<00:00, 739.39it/s] Miguel Herran : 100%|███████████████████████████████████| 81/81 [00:00<00:00, 750.11it/s] Mike Colter : 100%|███████████████████████████████████| 71/71 [00:00<00:00, 747.37it/s] Millie Bobby Brown : 100%|███████████████████████████████████| 82/82 [00:00<00:00, 694.79it/s] Morena Baccarin : 100%|█████████████████████████████████| 132/132 [00:00<00:00, 705.97it/s] Morgan Freeman : 100%|███████████████████████████████████| 97/97 [00:00<00:00, 713.31it/s] Natalie Portman : 100%|█████████████████████████████████| 117/117 [00:00<00:00, 735.84it/s] Neil Patrick Harris : 100%|███████████████████████████████████| 73/73 [00:00<00:00, 688.70it/s] Paul Rudd : 100%|█████████████████████████████████| 127/127 [00:00<00:00, 705.58it/s] Pedro Alonso : 100%|███████████████████████████████████| 77/77 [00:00<00:00, 712.98it/s] Peter Dinklage : 100%|███████████████████████████████████| 94/94 [00:00<00:00, 717.65it/s] Rami Melek : 100%|███████████████████████████████████| 75/75 [00:00<00:00, 757.70it/s] Rihanna : 100%|█████████████████████████████████| 120/120 [00:00<00:00, 722.98it/s] Rj Mitte : 100%|███████████████████████████████████| 71/71 [00:00<00:00, 755.54it/s] Robert Downey Jr : 100%|█████████████████████████████████| 107/107 [00:00<00:00, 718.11it/s] Robert Knepper : 100%|███████████████████████████████████| 95/95 [00:00<00:00, 725.20it/s] Robin Taylor : 100%|███████████████████████████████████| 99/99 [00:00<00:00, 750.09it/s] Ryan Reynolds : 100%|███████████████████████████████████| 97/97 [00:00<00:00, 713.23it/s] Sarah Wayne Callies : 100%|█████████████████████████████████| 120/120 [00:00<00:00, 697.69it/s] Scarlett Johansson : 100%|█████████████████████████████████| 146/146 [00:00<00:00, 744.89it/s] Sean Pertwee : 100%|███████████████████████████████████| 82/82 [00:00<00:00, 733.12it/s] Sebastian Stan : 100%|█████████████████████████████████| 107/107 [00:00<00:00, 681.54it/s] Selena Gomez : 100%|███████████████████████████████████| 93/93 [00:00<00:00, 738.10it/s] Shakira : 100%|███████████████████████████████████| 50/50 [00:00<00:00, 714.27it/s] Sophie Turner : 100%|█████████████████████████████████| 121/121 [00:00<00:00, 756.24it/s] Stephen Amell : 100%|█████████████████████████████████| 100/100 [00:00<00:00, 684.93it/s] Sundar Pichai : 100%|███████████████████████████████████| 89/89 [00:00<00:00, 741.59it/s] Tati Gabrielle : 100%|███████████████████████████████████| 65/65 [00:00<00:00, 755.93it/s] Taylor Swift : 100%|███████████████████████████████████| 99/99 [00:00<00:00, 738.79it/s] Thomas Middleditch : 100%|███████████████████████████████████| 82/82 [00:00<00:00, 732.14it/s] Tom Cavanagh : 100%|█████████████████████████████████| 100/100 [00:00<00:00, 709.22it/s] Tom Holland : 100%|█████████████████████████████████| 119/119 [00:00<00:00, 730.05it/s] Ursula Corbero : 100%|███████████████████████████████████| 80/80 [00:00<00:00, 747.80it/s] Wentworth Miller : 100%|█████████████████████████████████| 113/113 [00:00<00:00, 660.82it/s] Willa Holland : 100%|█████████████████████████████████| 147/147 [00:00<00:00, 703.35it/s] William Fichtner : 100%|█████████████████████████████████| 139/139 [00:00<00:00, 735.44it/s] Zendaya : 100%|█████████████████████████████████| 109/109 [00:00<00:00, 736.48it/s]
# total dataset of 10770 images
len(data['images'])
10770
data.keys() # all images in two arrays with associated labels (names)
dict_keys(['labels', 'images'])
images.keys() # to acess images of people using Name directly
dict_keys(['Aaron Paul', 'Alexandra Daddario', 'Alvaro Morte', 'Alycia Debnam Carey ', 'Amanda Crew', 'Amaury Nolasco', 'Amber Heard ', 'Anna Gunn', 'Anne Hathaway', 'Barbara Palvin ', 'Bellamy Blake ', 'Benedict Cumberbatch', 'Betsy Brandt', 'Bill Gates', 'Brenton Thwaites', 'Brie Larson', 'Brit Marling', 'Bryan Cranston', 'Caity Lotz', 'Cameron Monaghan', 'Chadwick Boseman ', 'Chance Perdomo', 'Chris Evans', 'Chris Pratt', 'Cobie Smulders', 'Danielle Panabaker', 'Dave Franco', 'David Mazouz', 'Dominic Purcell', 'Drake', 'Dua Lipa ', 'Dwayne Johnson', 'Eliza Taylor', 'Elizabeth Olsen ', 'Elon Musk', 'Emilia Clarke', 'Emily Bett Rickards', 'Emma Stone', 'Emma Watson ', 'Gal Gadot ', 'Grant Gustin ', 'Gwyneth Paltrow', 'Henry Cavil', 'Jason Isaacs', 'Jason Momoa', 'Jeff Bezos', 'Jeremy Renner', 'Jesse Eisenberg', 'Jim Parsons', 'Jon Bernthal', 'Josh Radnor', 'Kiernan Shipka', 'Kit Harington', 'Kristen Stewart ', 'Krysten Ritter', 'Kumail Nanjiani', 'Lindsey Morgan ', 'Maisie Williams', 'Margot Robbie ', 'Maria Pedraza', 'Mark Ruffalo', 'Mark Zuckerberg', 'Martin Starr', 'Melissa Benoit', 'Miguel Herran', 'Mike Colter', 'Millie Bobby Brown', 'Morena Baccarin', 'Morgan Freeman', 'Natalie Portman', 'Neil Patrick Harris', 'Paul Rudd', 'Pedro Alonso', 'Peter Dinklage', 'Rami Melek', 'Rihanna', 'Rj Mitte', 'Robert Downey Jr ', 'Robert Knepper', 'Robin Taylor', 'Ryan Reynolds', 'Sarah Wayne Callies', 'Scarlett Johansson', 'Sean Pertwee', 'Sebastian Stan', 'Selena Gomez', 'Shakira', 'Sophie Turner', 'Stephen Amell', 'Sundar Pichai', 'Tati Gabrielle', 'Taylor Swift', 'Thomas Middleditch', 'Tom Cavanagh', 'Tom Holland ', 'Ursula Corbero', 'Wentworth Miller', 'Willa Holland', 'William Fichtner', 'Zendaya'])
def display(img, label, fontsize=18, cmap=None, shape=True):
'''helper to show images'''
if cmap is None:
plt.imshow(img)
else:
plt.imshow(img, cmap=cmap)
plt.axis('off')
plt.title(f'{label}', fontsize=18)
plt.axis('off')
plt.show()
if shape:
print(f'Shape: {img.shape}')
ind = random.choice(range(len(data['images'])))
# display random image from our dataset
display(img=data['images'][ind], label=data['labels'][ind])
Shape: (299, 299, 3)
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (Dropout,
Flatten,
Activation,
ZeroPadding2D,
Convolution2D,
MaxPooling2D)
def vgg():
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Convolution2D(4096, (7, 7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))
return model
# load the pretrained weights
vgg_model = vgg()
vgg_model.load_weights('./data/Part 3 - vgg_face_weights.h5')
from keras.utils.layer_utils import count_params
def get_params(model):
trainable_count = count_params(model.trainable_weights)
non_trainable_count = count_params(model.non_trainable_weights)
print('Total params: {:,}'.format(trainable_count + non_trainable_count))
print('Trainable params: {:,}'.format(trainable_count))
print('Non-trainable params: {:,}'.format(non_trainable_count))
get_params(vgg_model)
Total params: 145,002,878 Trainable params: 145,002,878 Non-trainable params: 0
Using TensorFlow backend.
vgg_model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= zero_padding2d (ZeroPadding2 (None, 226, 226, 3) 0 _________________________________________________________________ conv2d (Conv2D) (None, 224, 224, 64) 1792 _________________________________________________________________ zero_padding2d_1 (ZeroPaddin (None, 226, 226, 64) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 224, 224, 64) 36928 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 112, 112, 64) 0 _________________________________________________________________ zero_padding2d_2 (ZeroPaddin (None, 114, 114, 64) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 112, 112, 128) 73856 _________________________________________________________________ zero_padding2d_3 (ZeroPaddin (None, 114, 114, 128) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 112, 112, 128) 147584 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 56, 56, 128) 0 _________________________________________________________________ zero_padding2d_4 (ZeroPaddin (None, 58, 58, 128) 0 _________________________________________________________________ conv2d_4 (Conv2D) (None, 56, 56, 256) 295168 _________________________________________________________________ zero_padding2d_5 (ZeroPaddin (None, 58, 58, 256) 0 _________________________________________________________________ conv2d_5 (Conv2D) (None, 56, 56, 256) 590080 _________________________________________________________________ zero_padding2d_6 (ZeroPaddin (None, 58, 58, 256) 0 _________________________________________________________________ conv2d_6 (Conv2D) (None, 56, 56, 256) 590080 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 28, 28, 256) 0 _________________________________________________________________ zero_padding2d_7 (ZeroPaddin (None, 30, 30, 256) 0 _________________________________________________________________ conv2d_7 (Conv2D) (None, 28, 28, 512) 1180160 _________________________________________________________________ zero_padding2d_8 (ZeroPaddin (None, 30, 30, 512) 0 _________________________________________________________________ conv2d_8 (Conv2D) (None, 28, 28, 512) 2359808 _________________________________________________________________ zero_padding2d_9 (ZeroPaddin (None, 30, 30, 512) 0 _________________________________________________________________ conv2d_9 (Conv2D) (None, 28, 28, 512) 2359808 _________________________________________________________________ max_pooling2d_3 (MaxPooling2 (None, 14, 14, 512) 0 _________________________________________________________________ zero_padding2d_10 (ZeroPaddi (None, 16, 16, 512) 0 _________________________________________________________________ conv2d_10 (Conv2D) (None, 14, 14, 512) 2359808 _________________________________________________________________ zero_padding2d_11 (ZeroPaddi (None, 16, 16, 512) 0 _________________________________________________________________ conv2d_11 (Conv2D) (None, 14, 14, 512) 2359808 _________________________________________________________________ zero_padding2d_12 (ZeroPaddi (None, 16, 16, 512) 0 _________________________________________________________________ conv2d_12 (Conv2D) (None, 14, 14, 512) 2359808 _________________________________________________________________ max_pooling2d_4 (MaxPooling2 (None, 7, 7, 512) 0 _________________________________________________________________ conv2d_13 (Conv2D) (None, 1, 1, 4096) 102764544 _________________________________________________________________ dropout (Dropout) (None, 1, 1, 4096) 0 _________________________________________________________________ conv2d_14 (Conv2D) (None, 1, 1, 4096) 16781312 _________________________________________________________________ dropout_1 (Dropout) (None, 1, 1, 4096) 0 _________________________________________________________________ conv2d_15 (Conv2D) (None, 1, 1, 2622) 10742334 _________________________________________________________________ flatten (Flatten) (None, 2622) 0 _________________________________________________________________ activation (Activation) (None, 2622) 0 ================================================================= Total params: 145,002,878 Trainable params: 145,002,878 Non-trainable params: 0 _________________________________________________________________
vgg_face = Model(inputs=vgg_model.layers[0].input, outputs=vgg_model.layers[-2].output)
ind = random.choice(range(len(data['images'])))
# display random image from our dataset
display(img=data['images'][ind], label=data['labels'][ind])
img=data['images'][ind]
Shape: (299, 299, 3)
# Image pixles range between 0 - 255 -> Scale to 0 - 1 to use with the vgg model
sns.set()
sns.histplot(img.flatten())
plt.title('Image Pixel Distribution', fontsize=15)
plt.show()
# get embedding vector for the random image in the data using our pre-trained VGG Face model
HEIGHT = 224
WIDTH = 224
# scaling pixel values from orignal scale: [0-255] to range: [0-1]
print('Original Shape: ', img.shape)
img = (img / 255.).astype(np.float32)
img = cv2.resize(img, dsize=(HEIGHT, WIDTH))
print('Scaled Shape: ', img.shape)
# get the embedding vector for the above image using vgg_face_descriptor model and print the shape
img_emb = vgg_face.predict(np.expand_dims(img, axis=0))[0]
print('Image Embedding Shape: ', img_emb.shape)
Original Shape: (299, 299, 3) Scaled Shape: (224, 224, 3) Image Embedding Shape: (2622,)
pprint(img_emb, compact=True)
array([ 0.01217856, -0.02279992, -0.00221974, ..., -0.02014877,
-0.01565544, -0.01233729], dtype=float32)
# generate embeddings for all images in the dataset
embeddings = []
embeddings = np.zeros((len(data['images']), img_emb.shape[0]))
for idx in trange(len(data['images']), ncols=100):
try:
image = data['images'][idx]
image = (image/255.).astype(np.float32)
image = cv2.resize(image, dsize=(HEIGHT, WIDTH))
embeddings[idx] = vgg_face.predict(np.expand_dims(image, axis=0))[0]
except Exception as e:
print(e)
embeddings[idx] = np.zeros(2622)
embeddings = np.array(embeddings)
100%|█████████████████████████████████████████████████████████| 10770/10770 [15:03<00:00, 11.92it/s]
embeddings[1].shape
(2622,)
from numpy.linalg import norm
def l2_distance(x, y):
'''Squared L2 Distance'''
return np.sum(np.square(x - y))
def cosine_similarity(x,y):
'''Cosine Similarity between vectors'''
return 1 - np.dot(x, y)/(norm(x)*norm(y))
distances = {str(distance.__name__): distance for distance in (l2_distance, cosine_similarity)}
distances
{'l2_distance': <function __main__.l2_distance(x, y)>,
'cosine_similarity': <function __main__.cosine_similarity(x, y)>}
def distance_pair(idx1, idx2, metric='l2_distance',
embeddings=embeddings):
plt.figure(figsize=(8,3))
distance = distances.get(metric, None)
plt.suptitle(f'Distance = {distance(embeddings[idx1], embeddings[idx2]):.3f}')
plt.subplot(121)
img=data['images'][idx1]
label=data['labels'][idx1]
plt.axis('off')
plt.imshow(img)
plt.title(label)
plt.subplot(122)
img=data['images'][idx2]
label=data['labels'][idx2]
plt.axis('off')
plt.imshow(img)
plt.title(label)
distance_pair(100, 1500)
distance_pair(100, 1500, metric='cosine_similarity')
distance_pair(100, 102)
distance_pair(100, 102, metric='cosine_similarity')
distance_pair(1500, 2000)
distance_pair(1500, 2000, metric='cosine_similarity')
# Label Encoding for the names (categorical var)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Y = le.fit_transform(data['labels'])
Y.shape
(10770,)
le.classes_
array(['Aaron Paul', 'Alexandra Daddario', 'Alvaro Morte',
'Alycia Debnam Carey ', 'Amanda Crew', 'Amaury Nolasco',
'Amber Heard ', 'Anna Gunn', 'Anne Hathaway', 'Barbara Palvin ',
'Bellamy Blake ', 'Benedict Cumberbatch', 'Betsy Brandt',
'Bill Gates', 'Brenton Thwaites', 'Brie Larson', 'Brit Marling',
'Bryan Cranston', 'Caity Lotz', 'Cameron Monaghan',
'Chadwick Boseman ', 'Chance Perdomo', 'Chris Evans',
'Chris Pratt', 'Cobie Smulders', 'Danielle Panabaker',
'Dave Franco', 'David Mazouz', 'Dominic Purcell', 'Drake',
'Dua Lipa ', 'Dwayne Johnson', 'Eliza Taylor', 'Elizabeth Olsen ',
'Elon Musk', 'Emilia Clarke', 'Emily Bett Rickards', 'Emma Stone',
'Emma Watson ', 'Gal Gadot ', 'Grant Gustin ', 'Gwyneth Paltrow',
'Henry Cavil', 'Jason Isaacs', 'Jason Momoa', 'Jeff Bezos',
'Jeremy Renner', 'Jesse Eisenberg', 'Jim Parsons', 'Jon Bernthal',
'Josh Radnor', 'Kiernan Shipka', 'Kit Harington',
'Kristen Stewart ', 'Krysten Ritter', 'Kumail Nanjiani',
'Lindsey Morgan ', 'Maisie Williams', 'Margot Robbie ',
'Maria Pedraza', 'Mark Ruffalo', 'Mark Zuckerberg', 'Martin Starr',
'Melissa Benoit', 'Miguel Herran', 'Mike Colter',
'Millie Bobby Brown', 'Morena Baccarin', 'Morgan Freeman',
'Natalie Portman', 'Neil Patrick Harris', 'Paul Rudd',
'Pedro Alonso', 'Peter Dinklage', 'Rami Melek', 'Rihanna',
'Rj Mitte', 'Robert Downey Jr ', 'Robert Knepper', 'Robin Taylor',
'Ryan Reynolds', 'Sarah Wayne Callies', 'Scarlett Johansson',
'Sean Pertwee', 'Sebastian Stan', 'Selena Gomez', 'Shakira',
'Sophie Turner', 'Stephen Amell', 'Sundar Pichai',
'Tati Gabrielle', 'Taylor Swift', 'Thomas Middleditch',
'Tom Cavanagh', 'Tom Holland ', 'Ursula Corbero',
'Wentworth Miller', 'Willa Holland', 'William Fichtner', 'Zendaya'],
dtype='<U20')
np.unique(Y) # 100 names converted to encodings
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
dtype=int64)
# split into train and test to before applying PCA to prevent data leakage and inflated results
SPLIT = 0.1 # 10% of images go into test set
# take a sample of indices from the whole set to use in test set
test_indices = np.array(random.sample(range(len(data['images'])), int(len(data['images']) * SPLIT)))
# diff (all - test_indices => train_indices)
train_indices = np.array(list(set(list(range(len(data['images'])))).difference(set(test_indices))))
# Split using the indices
X_train = embeddings[train_indices]
X_test = embeddings[test_indices]
X_train.shape, X_test.shape
((9693, 2622), (1077, 2622))
y_train = Y[train_indices]
y_test = Y[test_indices]
y_train.shape, y_test.shape
((9693,), (1077,))
# scale the values in embeddings to use PCA
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test) # do not fit on testing data to prevent data leakage
# Principal Components Decomposition
from sklearn.decomposition import PCA
pca = PCA()
X_train_pca = pca.fit_transform(X_train_scaled)
exp_var_pca = pca.explained_variance_ratio_
# Cumulative sum of eigenvalues to create step plot for visualizing
# the variance explained by each principal component+-
cum_sum_eigenvalues = np.cumsum(exp_var_pca)
plt.bar(range(0,len(exp_var_pca)), exp_var_pca, alpha=0.5, align='center', label='Individual explained variance')
plt.step(range(0,len(cum_sum_eigenvalues)), cum_sum_eigenvalues, where='mid', label='Cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal component index')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
EXPLAINED_THRESH = 0.99
# solve and find number of components for 97% explained
pca = PCA(n_components=EXPLAINED_THRESH, svd_solver='full')
X_train_pc = pca.fit_transform(X_train_scaled)
#pca.explained_variance_ratio_
print(f"Principal Components: {pca.n_components_}"
f"\nTotal explained variance = {pca.explained_variance_ratio_.sum():.6f}")
Principal Components: 901 Total explained variance = 0.990003
X_test_pc = pca.transform(X_test_scaled) # transform test data
X_train_pc.shape, X_test_pc.shape
((9693, 901), (1077, 901))
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# hyper paramter tuning
parameters = {'C':[1, 10, 100],'gamma':[1, 0.1, 0.01], 'kernel':['linear','rbf']}
clf = GridSearchCV(SVC(), parameters, cv=6, verbose=10, n_jobs=-1)
clf.fit(X_train_pc, y_train)
Fitting 6 folds for each of 18 candidates, totalling 108 fits
GridSearchCV(cv=6, estimator=SVC(), n_jobs=-1,
param_grid={'C': [1, 10, 100], 'gamma': [1, 0.1, 0.01],
'kernel': ['linear', 'rbf']},
verbose=10)
clf.best_params_
{'C': 1, 'gamma': 1, 'kernel': 'linear'}
svc = SVC(C=1, gamma=1, kernel='linear', probability=False, random_state=seed)
svc.fit(X_train_pc, y_train)
SVC(C=1, gamma=1, kernel='linear', random_state=1996)
def scores(model):
'''accuracies helper'''
train_acc = model.score(X_train_pc, y_train)
test_acc = model.score(X_test_pc, y_test)
print(f'Train: {train_acc*100:.2f}%')
print(f'Test: {test_acc*100:.2f}%')
def plot_confusion(y_true, y_pred, title=''):
sns.set()
plt.figure(figsize=(35, 35))
conf = confusion_matrix(y_true, y_pred)
ax = sns.heatmap(conf.T, annot=True, fmt='.0f', cmap=sns.color_palette("rocket", as_cmap=True))
ax.set_title(title, y=1.20, fontsize=40)
plt.xlabel('Predicted Values')
plt.ylabel('True Values')
plt.show()
scores(svc)
Train: 99.98% Test: 96.47%
y_pred = svc.predict(X_test_pc)
plot_confusion(y_test, y_pred, title='Confusion matrix (SVC trained on Principal Components)')
print(f'Classification Report: \n{classification_report(y_test, y_pred, target_names=le.classes_)}')
Classification Report:
precision recall f1-score support
Aaron Paul 0.94 1.00 0.97 17
Alexandra Daddario 0.93 1.00 0.96 13
Alvaro Morte 1.00 1.00 1.00 8
Alycia Debnam Carey 0.93 1.00 0.97 14
Amanda Crew 1.00 0.93 0.96 14
Amaury Nolasco 1.00 1.00 1.00 13
Amber Heard 1.00 1.00 1.00 16
Anna Gunn 1.00 1.00 1.00 7
Anne Hathaway 1.00 1.00 1.00 13
Barbara Palvin 1.00 0.92 0.96 13
Bellamy Blake 0.89 1.00 0.94 8
Benedict Cumberbatch 1.00 0.89 0.94 9
Betsy Brandt 1.00 0.89 0.94 9
Bill Gates 0.88 1.00 0.93 7
Brenton Thwaites 1.00 1.00 1.00 11
Brie Larson 0.93 0.93 0.93 15
Brit Marling 0.95 1.00 0.97 19
Bryan Cranston 1.00 1.00 1.00 9
Caity Lotz 0.90 0.90 0.90 10
Cameron Monaghan 1.00 1.00 1.00 18
Chadwick Boseman 1.00 1.00 1.00 17
Chance Perdomo 1.00 1.00 1.00 7
Chris Evans 1.00 1.00 1.00 4
Chris Pratt 1.00 1.00 1.00 14
Cobie Smulders 1.00 1.00 1.00 18
Danielle Panabaker 1.00 1.00 1.00 10
Dave Franco 0.92 1.00 0.96 12
David Mazouz 0.60 0.75 0.67 4
Dominic Purcell 1.00 1.00 1.00 8
Drake 0.75 1.00 0.86 3
Dua Lipa 0.83 1.00 0.91 5
Dwayne Johnson 0.94 1.00 0.97 17
Eliza Taylor 0.89 0.89 0.89 9
Elizabeth Olsen 1.00 0.92 0.96 12
Elon Musk 1.00 1.00 1.00 9
Emilia Clarke 1.00 0.74 0.85 23
Emily Bett Rickards 0.86 1.00 0.92 6
Emma Stone 1.00 0.93 0.96 14
Emma Watson 0.82 1.00 0.90 9
Gal Gadot 0.95 1.00 0.98 20
Grant Gustin 1.00 0.89 0.94 19
Gwyneth Paltrow 0.89 1.00 0.94 16
Henry Cavil 1.00 1.00 1.00 7
Jason Isaacs 0.94 1.00 0.97 15
Jason Momoa 1.00 1.00 1.00 19
Jeff Bezos 1.00 1.00 1.00 4
Jeremy Renner 1.00 1.00 1.00 12
Jesse Eisenberg 1.00 0.92 0.96 12
Jim Parsons 0.88 1.00 0.93 7
Jon Bernthal 1.00 0.71 0.83 7
Josh Radnor 1.00 1.00 1.00 15
Kiernan Shipka 1.00 0.89 0.94 18
Kit Harington 1.00 0.92 0.96 13
Kristen Stewart 0.88 1.00 0.93 7
Krysten Ritter 0.90 1.00 0.95 9
Kumail Nanjiani 1.00 1.00 1.00 7
Lindsey Morgan 1.00 1.00 1.00 6
Maisie Williams 1.00 1.00 1.00 16
Margot Robbie 0.95 0.95 0.95 20
Maria Pedraza 0.86 1.00 0.92 6
Mark Ruffalo 1.00 1.00 1.00 11
Mark Zuckerberg 1.00 1.00 1.00 6
Martin Starr 1.00 1.00 1.00 6
Melissa Benoit 0.93 1.00 0.96 13
Miguel Herran 0.91 1.00 0.95 10
Mike Colter 1.00 1.00 1.00 6
Millie Bobby Brown 1.00 1.00 1.00 5
Morena Baccarin 1.00 1.00 1.00 8
Morgan Freeman 1.00 1.00 1.00 8
Natalie Portman 1.00 0.89 0.94 9
Neil Patrick Harris 1.00 1.00 1.00 12
Paul Rudd 1.00 1.00 1.00 11
Pedro Alonso 1.00 1.00 1.00 7
Peter Dinklage 1.00 1.00 1.00 7
Rami Melek 1.00 0.85 0.92 13
Rihanna 0.90 0.90 0.90 10
Rj Mitte 1.00 0.67 0.80 9
Robert Downey Jr 1.00 1.00 1.00 13
Robert Knepper 0.89 1.00 0.94 8
Robin Taylor 1.00 1.00 1.00 10
Ryan Reynolds 1.00 1.00 1.00 9
Sarah Wayne Callies 1.00 1.00 1.00 10
Scarlett Johansson 0.86 0.86 0.86 14
Sean Pertwee 1.00 1.00 1.00 7
Sebastian Stan 1.00 1.00 1.00 9
Selena Gomez 0.67 1.00 0.80 4
Shakira 1.00 1.00 1.00 3
Sophie Turner 0.95 0.95 0.95 19
Stephen Amell 1.00 1.00 1.00 6
Sundar Pichai 1.00 1.00 1.00 8
Tati Gabrielle 1.00 1.00 1.00 6
Taylor Swift 1.00 1.00 1.00 14
Thomas Middleditch 1.00 1.00 1.00 8
Tom Cavanagh 1.00 0.92 0.96 13
Tom Holland 1.00 1.00 1.00 11
Ursula Corbero 1.00 1.00 1.00 6
Wentworth Miller 0.93 0.93 0.93 15
Willa Holland 0.90 1.00 0.95 9
William Fichtner 1.00 1.00 1.00 15
Zendaya 1.00 0.90 0.95 10
accuracy 0.96 1077
macro avg 0.96 0.97 0.96 1077
weighted avg 0.97 0.96 0.96 1077
img_path = Path('./data/Part 2 Test Images/Part_2_-_Test_Image_-_Dwayne_Johnson4.jpg')
test_img_dwayne = imread(img_path)
display(test_img_dwayne, label='Dwayne Johnson')
Shape: (299, 299, 3)
img_path = Path('./data/Part 2 Test Images/Part_2-_Test_Image_-_Benedict_Cumberbatch9.jpg')
test_img_cumberbatch = imread(img_path)
display(test_img_cumberbatch, label='Benedict Cumberbatch')
Shape: (299, 299, 3)
# predict
def recognize_face(img, actual=None):
# scale the image
image = (img/255.).astype(np.float32)
# resize to 224 x 224
image = cv2.resize(image, dsize=(HEIGHT, WIDTH))
# get embedding
embedding = vgg_face.predict(np.expand_dims(image, axis=0))[0]
# scale using StandardScaler
scaled = sc.transform(embedding.reshape(1, -1))
# get the principal components
pc = pca.transform(scaled)
# predict using trained SVC model
pred = svc.predict(pc)
# inverse tranform encoding to the corresponding name
pred = le.inverse_transform(pred)[0]
pred = f'Prediction: {pred}'
if actual is not None:
pred = f'Actual: {actual}\n' + pred
display(img, label=pred, shape=False)
# detect faces in the prediction image
recognize_face(test_img_dwayne)
# detect faces in the prediction image
recognize_face(test_img_cumberbatch)
# test on a random imaage of all people
imgs = list()
labels = list()
for p in images:
# get random images for each species
for i in range(1):
imgs.append(random.choice(images[p]))
labels.append(p)
for test_img, label in zip(imgs, labels):
recognize_face(test_img, actual=label)